Purpose

This document compares the data from forest and nursery populations of Phytophthora ramorum. Forest populations are isolated from Curry County, OR between 2001 and 2014. Nursery populations were isolated between 2000 and 2012 from both California and Oregon nurseries. Both populations are combined in a data set called for2nur.

Required packages and data

library(PramCurry)
## Loading required package: poppr
## Loading required package: adegenet
## Loading required package: ade4
##    ==========================
##     adegenet 1.4-2 is loaded
##    ==========================
## 
##  - to start, type '?adegenet'
##  - to browse adegenet website, type 'adegenetWeb()'
##  - to post questions/comments: adegenet-forum@lists.r-forge.r-project.org
## 
## 
## This is poppr version 1.1.2.99.70. To get started, type package?poppr
library(poppr)
library(reshape2)
library(ggplot2)
library(ape)
library(igraph)
## 
## Attaching package: 'igraph'
## 
## The following object is masked from 'package:ape':
## 
##     edges
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
options(stringsAsFactors = FALSE)
data(ramdat)
data(for2nur)
data(pop_data)
data(myPal)
data(comparePal)
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] dplyr_0.2         igraph_0.7.1      ape_3.1-4.5       ggplot2_1.0.0    
## [5] reshape2_1.4      PramCurry_1.0     poppr_1.1.2.99-70 adegenet_1.4-2   
## [9] ade4_1.6-2       
## 
## loaded via a namespace (and not attached):
##  [1] assertthat_0.1      bitops_1.0-6        boot_1.3-11        
##  [4] caTools_1.17.1      colorspace_1.2-4    digest_0.6.4       
##  [7] evaluate_0.5.5      fastmatch_1.0-4     formatR_1.0        
## [10] gdata_2.13.3        ggmap_2.3           grid_3.1.2         
## [13] gtable_0.1.2        gtools_3.4.1        htmltools_0.2.6    
## [16] httpuv_1.3.0        knitr_1.6           labtools_0.4.1     
## [19] lattice_0.20-29     lubridate_1.3.3     mapproj_1.2-2      
## [22] maps_2.3-9          MASS_7.3-34         Matrix_1.1-4       
## [25] memoise_0.2.1       munsell_0.4.2       nlme_3.1-117       
## [28] parallel_3.1.2      pegas_0.6           permute_0.8-3      
## [31] phangorn_1.99-7     plotrix_3.5-7       plyr_1.8.1         
## [34] png_0.1-7           proto_0.3-10        Rcpp_0.11.2        
## [37] RgoogleMaps_1.2.0.6 rjson_0.2.14        RJSONIO_1.3-0      
## [40] rmarkdown_0.3.10    scales_0.2.4        shiny_0.10.1       
## [43] stringr_0.6.2       tools_3.1.2         vegan_2.0-10       
## [46] xtable_1.7-4        yaml_2.1.13

Data summary

for2nur
## 
## This is a genclone object
## -------------------------
## Genotype information:
## 
##     98 multilocus genotypes
##    729 diploid individuals
##      5 codominant loci
## 
## Population information:
## 
##      3 hierarchical levels - SOURCE YEAR STATE
##      9 populations defined - Nursery_CA Nursery_OR JHallCr_OR ... 
## Winchuck_OR ChetcoMain_OR PistolRSF_OR
summary(for2nur)
## 
##  # Total number of genotypes:  729 
## 
##  # Population sample sizes:  
##    Nursery_CA    Nursery_OR    JHallCr_OR NFChetHigh_OR      Coast_OR 
##           145            71           244           114            34 
##   HunterCr_OR   Winchuck_OR ChetcoMain_OR  PistolRSF_OR 
##            66            35            16             4 
## 
##  # Number of alleles per locus:  
## L1 L2 L3 L4 L5 
##  2  2  7  3 24 
## 
##  # Number of alleles per population:  
##  1  2  3  4  5  6  7  8  9 
## 24 25 22 24 19 13 17 15 12 
## 
##  # Percentage of missing data:  
## [1] 0
## 
##  # Observed heterozygosity:  
##     L1     L2     L3     L4     L5 
## 0.9575 0.9588 0.9986 0.9808 0.9890 
## 
##  # Expected heterozygosity:  
##     L1     L2     L3     L4     L5 
## 0.4992 0.4992 0.6186 0.5006 0.8467
knitr::kable(poppr(for2nur, quiet = TRUE))
Pop N MLG eMLG SE H G Hexp E.5 Ia rbarD File
Nursery_CA 145 30 6.524 1.2968 2.5878 7.555 0.8737 0.5329 0.1868 0.1983 for2nur
Nursery_OR 71 19 6.544 1.2210 2.4150 7.468 0.8785 0.6348 0.0302 0.0337 for2nur
JHallCr_OR 244 30 4.887 1.3345 1.9720 3.131 0.6834 0.3446 0.3322 0.1031 for2nur
NFChetHigh_OR 114 35 6.812 1.3423 2.7354 7.071 0.8662 0.4211 0.1426 0.0660 for2nur
Coast_OR 34 12 5.911 1.1693 2.0483 5.558 0.8449 0.6748 0.2514 0.2828 for2nur
HunterCr_OR 66 4 1.876 0.6904 0.4227 1.242 0.1977 0.4596 -0.0440 -0.0471 for2nur
Winchuck_OR 35 9 4.291 1.0722 1.4736 2.882 0.6723 0.5594 -0.0196 -0.0138 for2nur
ChetcoMain_OR 16 7 5.000 0.9258 1.4500 2.844 0.6917 0.5652 0.3748 0.3957 for2nur
PistolRSF_OR 4 2 2.000 0.0000 0.5623 1.600 0.5000 0.7949 0.0000 NaN for2nur
Total 729 98 7.920 1.2245 3.4860 15.041 0.9348 0.4436 0.2034 0.0651 for2nur
invisible(genotype_curve(for2nur, sample = 1000, quiet = TRUE))

plot of chunk summary_stats

DAPC

This analysis will attempt to explain the differentiation between all populations. Here, I am setting the hierarchy to be by Source (one of Nursery, JHallCr, NFChetHigh, Coast, HunterCr, Winchuck, ChetcoMain, PistolRSF) and State (Oregon or California).

Cross validation

Since we want to avoid overfitting the data, we utilize cross-validation, which performs DAPC on 90% of the data and attempts to predict where the 10% that was left out came from. This is doen 1000 times per number of principal components. The number of principal components with the lowest mean squared error and highest mean successful assignment is then used for the final analysis.

setpop(for2nur) <- ~SOURCE/STATE
set.seed(9001)
system.time(xval <- xvalDapc(for2nur@tab, pop(for2nur), n.pca = 5:15, 
                             result = "overall", n.rep = 1000))

plot of chunk ORCA_xval

## NULL
##    user  system elapsed 
## 235.488   2.424 239.619
xval[-1]
## $`Median and Confidence Interval for Random Chance`
##    2.5%     50%   97.5% 
## 0.08994 0.11070 0.13914 
## 
## $`Mean Successful Assignment by Number of PCs of PCA`
##      5      6      7      8      9     10     11     12     13     14 
## 0.6714 0.6809 0.7022 0.7221 0.7217 0.7385 0.7387 0.7368 0.7367 0.7472 
##     15 
## 0.7579 
## 
## $`Number of PCs Achieving Highest Mean Success`
## [1] "15"
## 
## $`Root Mean Squared Error by Number of PCs of PCA`
##      5      6      7      8      9     10     11     12     13     14 
## 0.3300 0.3208 0.2991 0.2792 0.2796 0.2630 0.2627 0.2647 0.2647 0.2542 
##     15 
## 0.2438 
## 
## $`Number of PCs Achieving Lowest MSE`
## [1] "15"
## 
## $DAPC
##  #################################################
##  # Discriminant Analysis of Principal Components #
##  #################################################
## class: dapc
## $call: dapc.data.frame(x = as.data.frame(x), grp = ..1, n.pca = ..2, 
##     n.da = ..3)
## 
## $n.pca: 15 first PCs of PCA used
## $n.da: 8 discriminant functions saved
## $var (proportion of conserved variance): 0.981
## 
## $eig (eigenvalues): 837 202.9 97.36 55.12 43.67 ...
## 
##   vector    length content                   
## 1 $eig      8      eigenvalues               
## 2 $grp      729    prior group assignment    
## 3 $prior    9      prior group probabilities 
## 4 $assign   729    posterior group assignment
## 5 $pca.cent 38     centring vector of PCA    
## 6 $pca.norm 38     scaling vector of PCA     
## 7 $pca.eig  33     eigenvalues of PCA        
## 
##   data.frame    nrow ncol
## 1 $tab          729  15  
## 2 $means        9    15  
## 3 $loadings     15   8   
## 4 $ind.coord    729  8   
## 5 $grp.coord    9    8   
## 6 $posterior    729  9   
## 7 $pca.loadings 38   15  
## 8 $var.contr    38   8   
##   content                                          
## 1 retained PCs of PCA                              
## 2 group means                                      
## 3 loadings of variables                            
## 4 coordinates of individuals (principal components)
## 5 coordinates of groups                            
## 6 posterior membership probabilities               
## 7 PCA loadings of original variables               
## 8 contribution of original variables

DAPC

for2nur.dapc <- dapc(for2nur, n.pca = 20, n.da = 8)
# comparePal <- char2pal(c("JHallCr_OR", "NFChetHigh_OR", "Coast_OR", 
#                          "HunterCr_OR", "Winchuck_OR", "ChetcoMain_OR", 
#                          "PistolRSF_OR"))
# (comparePal <- c(Nursery_CA = "#000000", Nursery_OR = "#808080", comparePal))
par(mfrow = c(1, 2))
scatter(for2nur.dapc, col = comparePal, cex = 2, legend = TRUE, clabel = FALSE,
        posi.leg = "bottomleft", scree.pca = TRUE, posi.pca = "topright",
        cleg = 0.75, xax = 1, yax = 2, inset.solid = 1)
scatter(for2nur.dapc, col = comparePal, cex = 2, legend = TRUE, clabel = FALSE,
        posi.leg = "bottomleft", inset.solid = 1,
        cleg = 0.75, xax = 3, yax = 2)

plot of chunk analyzing_ORCA

par(mfrow = c(1, 1))

# pdf("fig5scatter.pdf", width = 3.5, height = 3.5, pointsize = 10)
# scatter(for2nur.dapc, col = comparePal, cex = 2, legend = TRUE, clabel = FALSE,
#         posi.leg = "bottomleft", scree.pca = TRUE, posi.pca = "topright",
#         posi.da = "topleft", cleg = 0.75, xax = 1, yax = 2, inset.solid = 1, 
#         ratio.pca = 0.2, ratio.da = 0.2)
# dev.off()

summary(for2nur.dapc)
## $n.dim
## [1] 8
## 
## $n.pop
## [1] 9
## 
## $assign.prop
## [1] 0.7805
## 
## $assign.per.pop
##    Nursery_CA    Nursery_OR    JHallCr_OR NFChetHigh_OR      Coast_OR 
##        0.7379        0.8310        0.8689        0.5439        0.6765 
##   HunterCr_OR   Winchuck_OR ChetcoMain_OR  PistolRSF_OR 
##        0.9848        0.8571        0.6875        0.0000 
## 
## $prior.grp.size
## 
##    Nursery_CA    Nursery_OR    JHallCr_OR NFChetHigh_OR      Coast_OR 
##           145            71           244           114            34 
##   HunterCr_OR   Winchuck_OR ChetcoMain_OR  PistolRSF_OR 
##            66            35            16             4 
## 
## $post.grp.size
## 
##    Nursery_CA    Nursery_OR    JHallCr_OR NFChetHigh_OR      Coast_OR 
##           118            91           251            71            46 
##   HunterCr_OR   Winchuck_OR ChetcoMain_OR  PistolRSF_OR 
##            73            50            28             1

The above plots show the first three DA coordinates. DA coordinate 2 is on the y axis. Plot 1 contains DA coordinate 1 whereas plot 2 containd DA coordinate 3. Plot 2 can be thought of as plot 1 rotated about the x axis.

setpop(for2nur) <- ~SOURCE/STATE
plot_posterior(for2nur.dapc, for2nur, pal = comparePal) + theme(axis.text.x = element_text(size = 3))

plot of chunk posterior_ORCA

Allele contributions

loadingplot(for2nur.dapc$var.contr, axis = 1)

plot of chunk loadingplot

theLocus <- truenames(seploc(for2nur)[["PrMS43A1"]])$tab
theFreqs <- apply(theLocus, 2, function(e) tapply(e, pop(for2nur), mean, 
                                                  na.rm = TRUE))
colnames(theFreqs) <- sub("PrMS43A1.", "", colnames(theFreqs))
names(dimnames(theFreqs)) <- c("Region", "Allele")
ggplot(melt(theFreqs),
       aes(x = Allele, y = value, color = Region, group = Region)) +
  geom_vline(aes(xintercept = Allele), alpha = 0.5, linetype = 3) + 
  geom_point(aes(size = log(value)), show_guide = FALSE) + 
  geom_line() + 
  scale_color_manual(values = comparePal) + 
  facet_wrap(~Region, ncol = 1) + 
  geom_text(aes(label = ifelse(value >= 0.05, Allele, NA), y = value + 0.05), 
            color = "black", position = "jitter") + 
  scale_x_log10() + 
  labs(list(x = "Allele Frequency")) + 
  theme_bw()
## Warning: Removed 20 rows containing missing values (geom_text).
## Warning: Removed 19 rows containing missing values (geom_text).
## Warning: Removed 21 rows containing missing values (geom_text).
## Warning: Removed 19 rows containing missing values (geom_text).
## Warning: Removed 19 rows containing missing values (geom_text).
## Warning: Removed 22 rows containing missing values (geom_text).
## Warning: Removed 21 rows containing missing values (geom_text).
## Warning: Removed 19 rows containing missing values (geom_text).
## Warning: Removed 20 rows containing missing values (geom_text).

plot of chunk allele_contrib

Which MLGs cross pouplations?

setpop(for2nur) <- ~SOURCE/YEAR
invisible(source_year <- mlg.crosspop(for2nur))
## MLG.4: (5 inds) JHallCr_2001 JHallCr_2002 JHallCr_2003 JHallCr_2013
## MLG.5: (3 inds) NFChetHigh_2013 ChetcoMain_2013 Winchuck_2013
## MLG.7: (5 inds) Nursery_2006 Nursery_2008 NFChetHigh_2013
## MLG.12: (6 inds) NFChetHigh_2013 NFChetHigh_2014
## MLG.14: (43 inds) JHallCr_2003 NFChetHigh_2012 JHallCr_2013 NFChetHigh_2013 
## NFChetHigh_2014
## MLG.15: (5 inds) NFChetHigh_2013 NFChetHigh_2014
## MLG.16: (3 inds) JHallCr_2001 JHallCr_2003 NFChetHigh_2013
## MLG.17: (17 inds) JHallCr_2002 JHallCr_2003 JHallCr_2004 NFChetHigh_2012 
## JHallCr_2013 NFChetHigh_2013
## MLG.18: (8 inds) Winchuck_2012 Winchuck_2013 PistolRSF_2013
## MLG.20: (3 inds) JHallCr_2001 JHallCr_2003 JHallCr_2013
## MLG.21: (13 inds) Nursery_0 Nursery_2007 JHallCr_2001 JHallCr_2003 
## NFChetHigh_2012 JHallCr_2013 NFChetHigh_2013
## MLG.22: (149 inds) JHallCr_2001 JHallCr_2002 JHallCr_2003 JHallCr_2004 
## Coast_2011 JHallCr_2013 NFChetHigh_2013 ChetcoMain_2013 PistolRSF_2013 
## Coast_2013 NFChetHigh_2014
## MLG.23: (23 inds) JHallCr_2002 JHallCr_2003 Winchuck_2012 NFChetHigh_2013 
## Winchuck_2013
## MLG.24: (7 inds) JHallCr_2001 JHallCr_2003 Winchuck_2012 NFChetHigh_2013 
## ChetcoMain_2013 Winchuck_2013
## MLG.27: (11 inds) Nursery_2008 JHallCr_2001 JHallCr_2003 JHallCr_2013
## MLG.28: (25 inds) Nursery_2007 Nursery_2008 JHallCr_2002 JHallCr_2003 
## NFChetHigh_2003 JHallCr_2004 JHallCr_2013 NFChetHigh_2013 ChetcoMain_2014 
## JHallCr_2014
## MLG.29: (12 inds) Nursery_2011 JHallCr_2002 JHallCr_2003 JHallCr_2013 
## NFChetHigh_2013 Winchuck_2013 JHallCr_2014
## MLG.30: (2 inds) JHallCr_2005 NFChetHigh_2013
## MLG.33: (11 inds) Nursery_0 Nursery_2007 Nursery_2008 Nursery_2005 Coast_2010 
## Coast_2014
## MLG.36: (2 inds) NFChetHigh_2013 Winchuck_2013
## MLG.41: (2 inds) JHallCr_2001 JHallCr_2013
## MLG.46: (5 inds) Coast_2006 NFChetHigh_2013 Coast_2014
## MLG.51: (6 inds) Nursery_0 Nursery_2004 Nursery_2012 JHallCr_2003
## MLG.52: (15 inds) Nursery_2011 JHallCr_2004 Coast_2011 NFChetHigh_2013 
## Coast_2013 Coast_2014
## MLG.53: (17 inds) Coast_2012 NFChetHigh_2013 ChetcoMain_2013 Coast_2013 
## Coast_2014 NFChetHigh_2014
## MLG.54: (13 inds) NFChetHigh_2013 ChetcoMain_2013 Coast_2014
## MLG.55: (2 inds) Nursery_0 HunterCr_2011
## MLG.56: (6 inds) Nursery_0 Nursery_2008 Nursery_2012 Coast_2013
## MLG.57: (3 inds) Nursery_2011 Nursery_2012 ChetcoMain_2013
## MLG.59: (60 inds) Nursery_2007 HunterCr_2011
## MLG.67: (2 inds) JHallCr_2003 JHallCr_2004
## MLG.68: (20 inds) JHallCr_2002 JHallCr_2003 JHallCr_2004 JHallCr_2005
## MLG.69: (2 inds) JHallCr_2002 JHallCr_2003
## MLG.70: (3 inds) JHallCr_2003 JHallCr_2004
## MLG.1011: (34 inds) Nursery_0 Nursery_2007 Nursery_2006 Nursery_2008 
## Nursery_2004 Nursery_2005 Nursery_2010 Nursery_2011
## MLG.1013: (6 inds) Nursery_2007 Nursery_2006 Nursery_2008 Nursery_2009
## MLG.1014: (14 inds) Nursery_2007 Nursery_2006 Nursery_2008 Nursery_2010 
## Nursery_2011 Nursery_2012
## MLG.1016: (2 inds) Nursery_2007 Nursery_2005
## MLG.1021: (9 inds) Nursery_2008 Nursery_2012
## MLG.1025: (42 inds) Nursery_0 Nursery_2007 Nursery_2006 Nursery_2008 
## Nursery_2004 Nursery_2000 Nursery_2010 Nursery_2011 Nursery_2012
## MLG.1026: (27 inds) Nursery_2007 Nursery_2008 Nursery_2010 Nursery_2011
## MLG.1032: (8 inds) Nursery_0 Nursery_2003 Nursery_2010
## MLG.1037: (2 inds) Nursery_0 Nursery_2011
## MLG.1038: (8 inds) Nursery_0 Nursery_2007 Nursery_2011 Nursery_2012
source_year <- source_year[vapply(lapply(source_year, names), 
                           function(z) any(grepl("Nursery", z)), logical(1))]
symlg <- mlgFromString(names(source_year))
invisible(mlg.crosspop(for2nur, mlgsub = symlg[symlg < 1000]))
## MLG.7: (5 inds) Nursery_2006 Nursery_2008 NFChetHigh_2013
## MLG.21: (13 inds) Nursery_0 Nursery_2007 JHallCr_2001 JHallCr_2003 
## NFChetHigh_2012 JHallCr_2013 NFChetHigh_2013
## MLG.27: (11 inds) Nursery_2008 JHallCr_2001 JHallCr_2003 JHallCr_2013
## MLG.28: (25 inds) Nursery_2007 Nursery_2008 JHallCr_2002 JHallCr_2003 
## NFChetHigh_2003 JHallCr_2004 JHallCr_2013 NFChetHigh_2013 ChetcoMain_2014 
## JHallCr_2014
## MLG.29: (12 inds) Nursery_2011 JHallCr_2002 JHallCr_2003 JHallCr_2013 
## NFChetHigh_2013 Winchuck_2013 JHallCr_2014
## MLG.33: (11 inds) Nursery_0 Nursery_2007 Nursery_2008 Nursery_2005 Coast_2010 
## Coast_2014
## MLG.51: (6 inds) Nursery_0 Nursery_2004 Nursery_2012 JHallCr_2003
## MLG.52: (15 inds) Nursery_2011 JHallCr_2004 Coast_2011 NFChetHigh_2013 
## Coast_2013 Coast_2014
## MLG.55: (2 inds) Nursery_0 HunterCr_2011
## MLG.56: (6 inds) Nursery_0 Nursery_2008 Nursery_2012 Coast_2013
## MLG.57: (3 inds) Nursery_2011 Nursery_2012 ChetcoMain_2013
## MLG.59: (60 inds) Nursery_2007 HunterCr_2011
nursery_cross <- unique(sub("_.+?$", "", unlist(lapply(source_year, names))))
pops <- levels(gethierarchy(for2nur, ~SOURCE)$SOURCE)
pops[!pops %in% nursery_cross] # regions with no Nursery isolates.
## [1] "PistolRSF"

MSN

setpop(for2nur) <- ~SOURCE/STATE
for2nur
## 
## This is a genclone object
## -------------------------
## Genotype information:
## 
##     98 multilocus genotypes
##    729 diploid individuals
##      5 codominant loci
## 
## Population information:
## 
##      3 hierarchical levels - SOURCE YEAR STATE
##      9 populations defined - Nursery_CA Nursery_OR JHallCr_OR ... 
## Winchuck_OR ChetcoMain_OR PistolRSF_OR
# nf.msn <- bruvo.msn(for2nur, replen = other(ramdat)$REPLEN, include.ties = TRUE,
#                     showplot = FALSE)

newReps <- other(ramdat)$REPLEN
(newReps[3] <- 4) # Tetranucleotide repeat
## [1] 4
(newReps <- fix_replen(ramdat, newReps))
##  PrMS6A1  Pr9C3A1 PrMS39A1 PrMS45A1 PrMS43A1 
##        3        2        4        4        4
nf.msn <- bruvo.msn(for2nur, replen = newReps, include.ties = TRUE,
                    showplot = FALSE)

degs <- igraph::degree(nf.msn$graph)
names(degs) <- igraph::V(nf.msn$graph)$label
sort(degs)
## MLG.1035 MLG.1016 MLG.1040 MLG.1018 MLG.1019 MLG.1039 MLG.1033 MLG.1001 
##        1        1        1        1        1        1        1        1 
## MLG.1020 MLG.1036   MLG.26   MLG.19   MLG.65   MLG.64   MLG.58   MLG.43 
##        1        1        1        1        1        1        1        1 
##   MLG.40   MLG.11   MLG.44   MLG.42   MLG.38    MLG.9   MLG.62   MLG.10 
##        1        1        1        1        1        1        1        1 
##   MLG.31   MLG.61    MLG.1   MLG.63 MLG.1021 MLG.1015 MLG.1004 MLG.1012 
##        1        1        1        1        2        2        2        2 
## MLG.1029 MLG.1017 MLG.1002   MLG.69   MLG.67   MLG.66   MLG.25    MLG.5 
##        2        2        2        2        2        2        2        2 
##   MLG.13    MLG.2   MLG.45    MLG.8    MLG.7 MLG.1013 MLG.1037   MLG.55 
##        2        2        2        2        3        3        3        3 
## MLG.1038 MLG.1034   MLG.20   MLG.41   MLG.70   MLG.32    MLG.6   MLG.37 
##        3        3        3        3        3        3        3        3 
##   MLG.50   MLG.39 MLG.1032   MLG.51   MLG.59   MLG.56   MLG.57    MLG.4 
##        3        3        4        4        4        4        4        4 
##   MLG.24   MLG.16   MLG.68    MLG.3   MLG.46   MLG.54   MLG.36   MLG.15 
##        4        4        4        4        4        4        4        4 
##   MLG.35   MLG.47   MLG.34 MLG.1025   MLG.21 MLG.1014 MLG.1031   MLG.27 
##        4        4        4        5        5        5        5        5 
## MLG.1006   MLG.52   MLG.29   MLG.14   MLG.60   MLG.30   MLG.18   MLG.12 
##        5        5        5        5        5        5        5        5 
##   MLG.49 MLG.1011   MLG.33   MLG.23   MLG.53   MLG.48 MLG.1026   MLG.28 
##        5        6        6        6        6        6        7        7 
##   MLG.22   MLG.17 
##        7        7
edges_to_remove <- E(nf.msn$graph)[E(nf.msn$graph)$weight >= 0.06]
clusts <- clusters(delete.edges(nf.msn$graph, edges_to_remove))
names(clusts$membership) <- V(nf.msn$graph)$label

# clustPal  <- clusts$csize
# theClusts <- clustPal > 3
# clustOrder <- order(clustPal, decreasing = TRUE)
# clustPal[clustOrder][theClusts[clustOrder]]   <- RColorBrewer::brewer.pal(sum(theClusts), "Set1")
# clustPal[clustOrder][!theClusts[clustOrder]]  <- gray.colors(sum(!theClusts))
# nodeList <- lapply(1:length(clustPal), function(x) which(clusts$membership == x))

make_node_list <- function(clusts, pal, cutoff = 3){
  PAL <- match.fun(pal)
  clustPal  <- table(clusts$membership)
  theClusts <- clustPal > 3
  clustOrder <- order(clustPal, decreasing = TRUE)
  clustPal[clustOrder][theClusts[clustOrder]]   <- PAL(sum(theClusts))
  clustPal[clustOrder][!theClusts[clustOrder]]  <- gray.colors(sum(!theClusts))
  nodeList <- lapply(1:length(clustPal), function(x) which(clusts$membership == x))
  names(nodeList) <- clustPal
  return(nodeList)
}

clust_cutoff <- function(clusts, cutoff = 3){
  table(clusts$membership) > 3
}

nodeList <- make_node_list(clusts, 
                           function(x) RColorBrewer::brewer.pal(x, "Set1"),
                           cutoff = 3)
theClusts <- clust_cutoff(clusts, 3)

goodSeed <- 6
# goodSeed <- 17
# goodSeed <- 24
thisPal <- function(x) comparePal[nf.msn$populations]
# for (i in goodSeed:100){
#   set.seed(i)
  set.seed(goodSeed)
  MASTER <- get_layout(nf.msn$graph, LAYOUT = layout.fruchterman.reingold)
  plot_poppr_msn(for2nur, nf.msn, gad = 10, palette = thisPal, mlg = TRUE, 
                 layfun = MASTER, nodebase = 1.75, vertex.label.font = 2,
                 quantiles = FALSE, #inds = unique(ramdat@mlg),
                 vertex.label.color = "firebrick",
                 mark.groups = nodeList[theClusts], 
                 mark.border = names(nodeList)[theClusts],
                 mark.col = transp(names(nodeList)[theClusts], 0.05),
                 mark.expand = 2,
                 mark.shape = 0)

plot of chunk nursery_msn

#   prompt <- paste("save", i, "as seed?")
#   accept <- readline(prompt)
#   if (substr(accept, 1, 1) == "y"){
#     theSeed <- i
#     break
#   }
# }

# mainMLGs <- order(table(ramdat@mlg), decreasing = TRUE)[1:10]
# pdf("msn_nursery.pdf", width = 352/2.54/10, height = 352/2.54/10, pointsize = 40)
#   plot_poppr_msn(for2nur, nf.msn, gad = 10, palette = thisPal, mlg = TRUE, 
#                  layfun = MASTER, nodebase = 1.75, vertex.label.font = 2,
#                  quantiles = FALSE, inds = mainMLGs, nodelab = 1000,
#                  vertex.label.color = "firebrick",
#                  mark.groups = nodeList[theClusts], 
#                  mark.border = names(nodeList)[theClusts],
#                  mark.col = transp(names(nodeList)[theClusts], 0.05),
#                  mark.expand = 2,
#                  mark.shape = 0)
# dev.off()

Nursery Predictions

assignTheme <- theme(
  panel.grid.major.y = element_line(color = "gray75", linetype = 3), 
  panel.grid.minor.y = element_blank(),
  panel.grid.major.x = element_line(color = "gray75"),
  panel.grid.minor.x = element_blank(),#element_line(color = "gray75"),
  panel.background = element_blank(),
  panel.border = element_blank(),
  legend.key = element_blank(),
  axis.line = element_line(color = "black"),
  axis.text = element_text(color = "black"),
  axis.ticks = element_line(color = "black")
  )

nurlist <- c("Nursery_CA", "Nursery_OR")
z2.dapc <- dapc(popsub(for2nur, blacklist = nurlist, drop = FALSE), 
                n.pca = 12, n.da = 6)
nur <- popsub(for2nur, nurlist, drop = FALSE)
nurpred <- predict.dapc(z2.dapc, newdata = nur)
plot_posterior(nurpred, nur, pal = comparePal)

plot of chunk predicting_Nursery

ggsave("nursery_predictions_structure.png", width = 183, height = 183, 
       units = "mm", dpi = 300)
colMeans(nurpred$posterior)
##    JHallCr_OR NFChetHigh_OR      Coast_OR   HunterCr_OR   Winchuck_OR 
##      0.189817      0.152097      0.596483      0.033956      0.012044 
## ChetcoMain_OR  PistolRSF_OR 
##      0.014192      0.001411
nurpredmat <- matrix(c( 
  rowMeans(apply(nurpred$posterior, 1, function(x) x >= 0.95)),
  rowMeans(apply(nurpred$posterior, 1, function(x) x >= 0.99)),
  rowMeans(apply(nurpred$posterior, 1, function(x) x >= 0.999))),
  ncol = 3)
dimnames(nurpredmat) <- list(Population = colnames(nurpred$posterior),
                             `membership probability` = c("95%", "99%", "99.9%"))
signif(nurpredmat, 3)*100
##                membership probability
## Population        95%    99% 99.9%
##   JHallCr_OR     6.02  0.926 0.000
##   NFChetHigh_OR  0.00  0.000 0.000
##   Coast_OR      48.10 28.200 0.463
##   HunterCr_OR    3.24  3.240 3.240
##   Winchuck_OR    0.00  0.000 0.000
##   ChetcoMain_OR  1.39  0.926 0.926
##   PistolRSF_OR   0.00  0.000 0.000
mean(apply(nurpred$posterior, 1, function(x) all(x <= 0.6)))
## [1] 0.2176
assignmat <- matrix(c(summary(z2.dapc)$assign.per.pop, 
                      summary(for2nur.dapc)$assign.per.pop[-c(1:2)]), 
                    ncol = 2
                    )
dimnames(assignmat) <- list(Population = colnames(nurpred$posterior), 
                            c("Without Nursery", "With Nursery"))
assignmat
##                
## Population      Without Nursery With Nursery
##   JHallCr_OR             0.9016       0.8689
##   NFChetHigh_OR          0.5351       0.5439
##   Coast_OR               0.8529       0.6765
##   HunterCr_OR            1.0000       0.9848
##   Winchuck_OR            0.8857       0.8571
##   ChetcoMain_OR          0.6875       0.6875
##   PistolRSF_OR           0.0000       0.0000
noseb.gt.10 <- popsub(for2nur, blacklist = c("HunterCr_OR", "PistolRSF_OR"),
                drop = FALSE)
set.seed(9001)
system.time(noseb.dapcxval <- xvalDapc(noseb.gt.10@tab, pop(noseb.gt.10), 
                                       n.pca = 5:20, result = "overall", 
                                       n.rep = 1000))

plot of chunk nosebpistolXval

## NULL
##    user  system elapsed 
## 305.562   2.598 311.087
noseb.dapcxval[-1]
## $`Median and Confidence Interval for Random Chance`
##   2.5%    50%  97.5% 
## 0.1177 0.1426 0.1699 
## 
## $`Mean Successful Assignment by Number of PCs of PCA`
##      5      6      7      8      9     10     11     12     13     14 
## 0.6638 0.6934 0.6878 0.6890 0.7073 0.7277 0.7298 0.7274 0.7370 0.7528 
##     15     16     17     18     19     20 
## 0.7576 0.7621 0.7590 0.7571 0.7587 0.7567 
## 
## $`Number of PCs Achieving Highest Mean Success`
## [1] "16"
## 
## $`Root Mean Squared Error by Number of PCs of PCA`
##      5      6      7      8      9     10     11     12     13     14 
## 0.3404 0.3107 0.3165 0.3147 0.2971 0.2772 0.2746 0.2772 0.2679 0.2523 
##     15     16     17     18     19     20 
## 0.2469 0.2426 0.2460 0.2482 0.2467 0.2483 
## 
## $`Number of PCs Achieving Lowest MSE`
## [1] "16"
## 
## $DAPC
##  #################################################
##  # Discriminant Analysis of Principal Components #
##  #################################################
## class: dapc
## $call: dapc.data.frame(x = as.data.frame(x), grp = ..1, n.pca = ..2, 
##     n.da = ..3)
## 
## $n.pca: 16 first PCs of PCA used
## $n.da: 6 discriminant functions saved
## $var (proportion of conserved variance): 0.982
## 
## $eig (eigenvalues): 359.7 122.3 71.24 59.53 31.3 ...
## 
##   vector    length content                   
## 1 $eig      6      eigenvalues               
## 2 $grp      659    prior group assignment    
## 3 $prior    7      prior group probabilities 
## 4 $assign   659    posterior group assignment
## 5 $pca.cent 38     centring vector of PCA    
## 6 $pca.norm 38     scaling vector of PCA     
## 7 $pca.eig  32     eigenvalues of PCA        
## 
##   data.frame    nrow ncol
## 1 $tab          659  16  
## 2 $means        7    16  
## 3 $loadings     16   6   
## 4 $ind.coord    659  6   
## 5 $grp.coord    7    6   
## 6 $posterior    659  7   
## 7 $pca.loadings 38   16  
## 8 $var.contr    38   6   
##   content                                          
## 1 retained PCs of PCA                              
## 2 group means                                      
## 3 loadings of variables                            
## 4 coordinates of individuals (principal components)
## 5 coordinates of groups                            
## 6 posterior membership probabilities               
## 7 PCA loadings of original variables               
## 8 contribution of original variables
noseb.dapc <- dapc(noseb.gt.10, n.pca = 16, n.da = 6)
scatter(noseb.dapc, col = comparePal[noseb.gt.10@pop.names], cex = 2, 
        legend = TRUE, clabel = FALSE,
        posi.leg = "bottomleft", scree.pca = TRUE, posi.pca = "topright",
        cleg = 0.75, xax = 1, yax = 2, inset.solid = 1)

plot of chunk unnamed-chunk-3

Inertia ellipses represent 95% data.

sebdat <- popsub(for2nur, c("HunterCr_OR", "PistolRSF_OR"), drop = FALSE)
sebpred <- predict.dapc(noseb.dapc, sebdat)
plot_posterior(sebpred, sebdat, pal = comparePal)

plot of chunk sebpredscatter

scatter(noseb.dapc, col = comparePal[noseb.gt.10@pop.names], 
        cex = 2, scree.da = FALSE, cellipse = 2.5,
        legend = TRUE, clabel = FALSE,
        posi.leg = "bottomright", posi.pca = "topright",
        cleg = 0.75, xax = 1, yax = 2, inset.solid = 1)
par(xpd = TRUE)
sebpch <- c(HunterCr_OR = 15, PistolRSF_OR = 17)
points(sebpred$ind.scores[, 1], sebpred$ind.scores[, 2], 
       pch = sebpch[as.character(pop(sebdat))], 
       col = transp(comparePal[sebdat@pop.names], 0.2), 
       cex = 3)
legend("topright", legend = c("HunterCr", "PistolRSF"), pch = sebpch, 
       col = comparePal[sebdat@pop.names], cex = 0.75)
add.scatter.eig(noseb.dapc$eig, 15, 1, 2, posi="bottomleft", inset=.02)

plot of chunk sebpredscatter

sebpredmat <- matrix(c(
  rowMeans(apply(sebpred$posterior[1:66, ], 1, function(x) x >= 0.5)),
  rowMeans(apply(sebpred$posterior[1:66, ], 1, function(x) x >= 0.95)),
  rowMeans(apply(sebpred$posterior[1:66, ], 1, function(x) x >= 0.99)),
  rowMeans(apply(sebpred$posterior[1:66, ], 1, function(x) x >= 0.999))),
  ncol = 4)
dimnames(sebpredmat) <- list(Population = colnames(sebpred$posterior),
                             `membership probability` = c("50%", "95%", "99%", 
                                                          "99.9%"))
signif(sebpredmat, 3)
##                membership probability
## Population      50%   95%   99%  99.9%
##   Nursery_CA      1 0.924 0.924 0.0152
##   Nursery_OR      0 0.000 0.000 0.0000
##   JHallCr_OR      0 0.000 0.000 0.0000
##   NFChetHigh_OR   0 0.000 0.000 0.0000
##   Coast_OR        0 0.000 0.000 0.0000
##   Winchuck_OR     0 0.000 0.000 0.0000
##   ChetcoMain_OR   0 0.000 0.000 0.0000
scatter(z2.dapc, cex = 2, legend = TRUE, clabel = FALSE, 
        col = comparePal[names(z2.dapc$prior)], scree.pca = FALSE,
        posi.leg = "bottomleft", posi.pca = "topleft",
        cleg = 0.75, bg="white", scree.da=0, pch=20, 
        xlim = c(-4, 15), ylim = range(z2.dapc$ind.coord[, 2]) + c(-0.01, 0.01), 
        solid = 0.5)
par(xpd=TRUE)
nurpch <- c(Nursery_CA = 15, Nursery_OR = 17)
points(nurpred$ind.scores[, 1], nurpred$ind.scores[, 2], 
       pch = nurpch[as.character(pop(nur))], col = transp("black", 0.2), cex = 2)
legend("topright", legend = c("Nursery (CA)", "Nursery (OR)"), pch = nurpch,
       cex = 0.75)
add.scatter.eig(z2.dapc$eig, 15, 1, 2, posi="bottomright", inset=.02)

plot of chunk nurpredscatter

ggplot(melt(assignmat), aes(y = Population, x = value, shape = Var2)) + 
  geom_point(size = 4, alpha = 0.75) +
  labs(list(y = "Population", x = "rate of successful reassignment", 
           shape = "", 
           title = "Posterior values from DAPC with and without Nursery data")
       ) + assignTheme

plot of chunk unnamed-chunk-4

ggsave("reassignment_plot.png", width = 183, height = 61, units = "mm", 
       dpi = 300)
ggplot(melt(nurpredmat), 
       aes(y = Population, x = value, size = `membership probability`)) + 
  geom_point(alpha = 0.75) + 
  assignTheme + 
  scale_size_discrete(range = c(6, 3)) + 
  labs(list(x = "Percent successful assignment", 
            size = "membership\nprobability",
            title = "Posterior values for predictions of nursery genotypes"))

plot of chunk unnamed-chunk-4

ggsave("nursery_predictions.png", width = 183, height = 61, units = "mm",
       dpi = 300)

Trees

myMLG <- myPal
nurMLG <- char2pal(sort(unique(for2nur@mlg[for2nur@mlg > 70])), grey.colors)
names(nurMLG) <- paste("MLG", names(nurMLG), sep = ".")
myMLG <- c(myMLG, nurMLG)

date()
## [1] "Thu Nov 27 13:45:48 2014"
set.seed(5555)
system.time(try(for2nur.tree <- bruvo.boot(for2nur, 
                                           replen = newReps,#other(for2nur)$REPLEN,
                                           tree = "nj", showtree = FALSE,
                                           quiet = TRUE, sample = 100)))
## Warning: Some branch lengths of the tree are negative. Normalizing
## branches according to Kuhner and Felsenstein (1994)
##    user  system elapsed 
##  522.78    4.43  531.39
date()
## [1] "Thu Nov 27 13:54:39 2014"
for2nur.tree$tip.label <- paste("MLG", for2nur@mlg, sep = ".")
for2nur.tree <- ladderize(for2nur.tree)

# pdf("~/Downloads/testree.pdf", width = 12, height = 80)
par(mfrow = c(1, 2))

plot.phylo(for2nur.tree, cex = 0.8, font = 2, adj = 0, 
           label.offset = 1/800, main = "color by MLG",
           tip.col = myMLG[for2nur.tree$tip.label])
nodelabels(ifelse(for2nur.tree$node.label > 50, for2nur.tree$node.label, NA), 
           adj = c(1.3, -0.5), frame = "n", 
           cex = 0.8, font = 3, xpd = TRUE)
add.scale.bar(lwd = 5)

plot.phylo(for2nur.tree, cex = 0.8, font = 2, adj = 0, 
           label.offset = 1/800, main = "color by Population",
           tip.col = comparePal[pop(for2nur)])
nodelabels(ifelse(for2nur.tree$node.label > 50, for2nur.tree$node.label, NA), 
           adj = c(1.3, -0.5), frame = "n", 
           cex = 0.8, font = 3, xpd = TRUE)
add.scale.bar(lwd = 5)
legend("topright", legend = names(comparePal), fill = comparePal, bty = "n",
       border = NULL)

plot of chunk treemod

par(mfrow = c(1, 1))
# dev.off()
set.seed(5001)
system.time(mlg.tree <- bruvo.boot(clonecorrect(for2nur, hier = NA), 
                                   replen = newReps,#other(for2nur)$REPLEN, 
                                   tree = "nj", 
                                   sample = 1000, 
                                   quiet = TRUE, 
                                   showtree = FALSE))
##    user  system elapsed 
##  74.814   0.437  75.609

Unique MLG tree

When Everett asked about the Joe Hall outbreak, he asked if the genotypes that had jumped accross the drainage appeared to be different than the ones on the West side. Looking at this, I saw that genotype 51 had appeared in 2003. This was a genotype from the second largest group in the MSN that has been expanding in current years.

Looking at the genotypes, I noticed that the genotypes from that group had a different set of alleles at PrMS39. Plotting these, we can see that they segregate quite well on the tree (which makes sense as it partially contributes to the structure).

mlg.tree$tip.label <- paste("MLG", clonecorrect(for2nur, hier = NA)@mlg, sep = ".")
mlg.tree <- ladderize(mlg.tree)
gts <- genind2df(clonecorrect(for2nur, hier = NA), sep = "|", usepop = FALSE)[[3]]
gtPal <- char2pal(gts, function(x) RColorBrewer::brewer.pal(x, "Dark2"))
oldTips <- mlg.tree$tip.label
mlg.tree$tip.label <- paste(oldTips, gts)
# png("~/Downloads/newtree.png", width = 10, height = 15, units = "in", res = 300)
par(mfrow = c(1, 2))
plot.phylo(mlg.tree, cex = 0.55, font = 2,  adj = 0,
           label.offset = 1/800, 
           tip.col = gtPal[gts], main = "color by locus PrMS39")
nodelabels(ifelse(mlg.tree$node.label > 50, mlg.tree$node.label, NA), 
           adj = c(1.3, -0.5), frame = "n", 
           cex = 0.8, font = 3, xpd = TRUE)
add.scale.bar(lwd = 5)


mlg.tree$tip.label <- oldTips
plot.phylo(mlg.tree, cex = 0.8, font = 2, adj = 0, 
           label.offset = 1/800,
           tip.col = names(nodeList)[clusts$membership], 
           main = "color by MSN cluster\nwith one mutational step")
nodelabels(ifelse(mlg.tree$node.label > 50, mlg.tree$node.label, NA), 
           adj = c(1.3, -0.5), frame = "n", 
           cex = 0.8, font = 3, xpd = TRUE)
add.scale.bar(lwd = 5)

plot of chunk tree2plot

par(mfrow = c(1, 1))
# dev.off()
setpop(ramdat) <- ~Pop
# setpop(ramdat) <- ~ZONE2
lociAlleleFreqs <- lapply(seploc(ramdat), function(z){
  x <- apply(truenames(z)$tab, 2, 
             function(e) tapply(e, pop(ramdat), mean, na.rm = TRUE))
  names(dimnames(x)) <- c("Year", "Allele")
#   names(dimnames(x)) <- c("Region", "Allele")
  dimnames(x)[[2]] <- sub("Pr.+?A1.", "", dimnames(x)[[2]]) 
  return(x)
})

ramdatcc <- clonecorrect(ramdat, ~Pop)
lociAlleleFreqscc <- lapply(seploc(ramdatcc), function(z){
  x <- apply(truenames(z)$tab, 2, 
             function(e) tapply(e, pop(ramdatcc), mean, na.rm = TRUE))
  names(dimnames(x)) <- c("Year", "Allele")
  dimnames(x)[[2]] <- sub("Pr.+?A1.", "", dimnames(x)[[2]]) 
  return(x)
})

bad_years <- as.character(2005:2011)
lociAlleleFreqs <- melt(lociAlleleFreqs) %>% filter(!Year %in% bad_years)
lociAlleleFreqscc <- melt(lociAlleleFreqscc) %>% filter(!Year %in% bad_years)

ggplot(lociAlleleFreqs, aes(x = Year, y = value, color = value,
# lociAlleleFreqs <- melt(lociAlleleFreqs)
# ggplot(lociAlleleFreqs, aes(x = Year, y = value, color = value, 
                                    group = Allele)) + 
  geom_line() + 
  geom_text(aes(label = Allele), fontface = "bold") + 
  scale_x_continuous(breaks = as.numeric(ramdat@pop.names[c(1:4, 9:11)])) +
  facet_wrap(~L1) + 
  assignTheme +
  labs(list(title = "Allele frequencies in forest data",
            y = "allele frequency",
            color = "frequency"))

plot of chunk unnamed-chunk-5

ggplot(filter(lociAlleleFreqs, L1 == "PrMS39A1"), 
#        aes(x = Region, y = value, color = factor(Allele), group = Allele)) +       
       aes(x = Year, y = value, color = factor(Allele), group = Allele)) +
#   geom_area(aes(fill = factor(Allele)), stat = "identity", alpha = 0.75) +
  geom_line(aes(linetype = "one")) + 
  geom_line(data = filter(lociAlleleFreqscc, L1 == "PrMS39A1"), 
            aes(linetype = "two")) +
  geom_point(size = 2) + 
  geom_point(size = 2, data = filter(lociAlleleFreqscc, L1 == "PrMS39A1"), 
             pch = 1) + 
#   geom_bar(aes(fill = factor(Allele)), position = "fill", stat = "identity") + 
  scale_x_continuous(breaks = as.numeric(ramdat@pop.names[c(1:4, 9:11)])) +
  scale_linetype_manual(breaks = c("one", "two"), values = 1:2, 
                        name = "data",
                        guide = "legend",
                        labels = c("Full", "Clone\nCorrected")) + 
  assignTheme +
  labs(list(title = "Allele frequencies in PrMS39",
            y = "allele frequency",
            color = "allele")) +
  scale_color_brewer(type = "div", palette = "Set1")

plot of chunk unnamed-chunk-6

ggsave(filename = "PrMS39_alleles.png", width = 183, height = 88, units = "mm")
assoc <- matrix(c(for2nur.tree$tip.label, for2nur.tree$tip.label), ncol = 2)
cophyloplot(for2nur.tree, mlg.tree, assoc, use.edge.length = FALSE, space = 1000,
            show.tip.label = FALSE, 
            col = myMLG[assoc[, 1]], lwd = 3, gap = 1)

plot of chunk cophylo

Population Tree

Since DAPC showed Cape Sebastian isolates clustring together and we see that nursery isolates are clustering around the cape sebastian isolates in the MSN, it would be a good idea to include the Nursery data within the population tree from Nei’s distance.

neiboot <- function(x, sample = 1000, color_by = "black"){
  x <- genind2genpop(x, quiet = TRUE)
  outTree <- aboot(x, sample = sample, cutoff = 50, quiet = TRUE, tree = "nj",
                   showtree = FALSE)
  plot.phylo(outTree, xpd = TRUE, font = 2, cex = 0.8, type = "u",
             label.offset = 0.005, adj = 0, tip.col = color_by)
  nodelabels(outTree$node.label, xpd = TRUE, frame = "n", cex = 0.8, font = 3)#,
#              adj = c(1.3, -0.5))
#   axisPhylo(3)
  add.scale.bar(lwd = 5)
  return(outTree)
}
set.seed(5555)
source_state <- neiboot(for2nur, sample = 10000, 
        color_by = comparePal[for2nur@pop.names])
## Warning: Infinite values detected.

plot of chunk tree_state_pop

set.seed(5555)
source_statecc <- neiboot(clonecorrect(for2nur, ~SOURCE/STATE, combine = TRUE),
                          sample = 10000, 
                          color_by = comparePal[for2nur@pop.names])
## Warning: Infinite values detected.

plot of chunk tree_state_pop

set.seed(5555)
source_ <- neiboot(setpop(for2nur, ~SOURCE), sample = 10000, 
        color_by = comparePal[for2nur@pop.names[-2]])
## Warning: Infinite values detected.

plot of chunk tree_base_pop

set.seed(5555)
source_cc <- neiboot(clonecorrect(for2nur, ~SOURCE), sample = 10000,
        color_by = comparePal[for2nur@pop.names[-2]])
## Warning: Infinite values detected.

plot of chunk tree_base_pop

add.tip.color <- function(x, tip_colors){
  tips <- x$tip.label
  colors <- paste0("[&!color=", tip_colors, "]")
  x$tip.label <- paste0(tips, colors)
  return(x)
}

round.nodes <- function(x){
  x$node.label <- round(x$node.label)
  return(x)
}

mywrite.nexus <- function(x, tip_colors){
  theFile <- paste(substitute(x), "nursery.nex", sep = "_")
  x <- round.nodes(add.tip.color(x, tip_colors))
  write.nexus(x, file = theFile)
} 
mywrite.nexus(source_state, comparePal[for2nur@pop.names])
mywrite.nexus(source_statecc, comparePal[for2nur@pop.names])
mywrite.nexus(source_, comparePal[for2nur@pop.names[-2]])
mywrite.nexus(source_cc, comparePal[for2nur@pop.names[-2]])